library(Seurat)
library(canceRbits)
library(dplyr)
library(patchwork)
library(DT)
library(SCpubr)
library(tibble)
library(dittoSeq)
library(scRepertoire)
library(reshape2)
library(viridis)
library(tidyr)
library(grDevices)
library(ggpubr)
library(ggplot2)
library(ggnewscale)
library(RColorBrewer)
library(scales)
library(enrichplot)
library(clusterProfiler)
library("org.Hs.eg.db")
library(DOSE)
library(msigdbr)
library(stringr)# Load seurat object containing single cell pre-processed and annotated data, in the metadata folder
srat <- readRDS(params$path_to_data)
meta <- srat@meta.data
meta$WHO <- "SD"
meta$WHO[meta$patient %in% c("NeoBCC007_post", "NeoBCC008_post", "NeoBCC012_post", "NeoBCC017_post")] <- "CR"
meta$WHO[meta$patient %in% c("NeoBCC004_post", "NeoBCC006_post", "NeoBCC010_post", "NeoBCC011_post")] <- "PR"
srat <- AddMetaData(srat, meta$WHO, col.name = "WHO")
srat$WHO <- factor(srat$WHO, levels = c("CR", "PR", "SD"))
s <- subset(srat, subset = anno_l1 %in% c("B cells","Plasma cells"))
s@meta.data$seurat_clusters <- factor(s@meta.data$seurat_clusters, levels=c("34","7", "21", "38",
"15", "22","3", "4", "24"))colors_B <- c(
"34" = "#ae017e",
"7" = "#377eb8",
"21" = "#f781bf",
"38" = "#fed976",
"15" = "#a6cee3" ,
"22" = "#e31a1c" ,
"3" = "#9e9ac8",
"4" = "#fdb462" ,
"24" = "#b3de69"
)
colors_clonotypes = c("Small (1e-04 < X <= 0.001)" = "#8c6bb1",
"Medium (0.001 < X <= 0.01)" = "#41b6c4",
"Large (0.01 < X <= 0.1)" = "#fec44f",
"Hyperexpanded (0.1 < X <= 1)" = "#ce1256"
).grabMeta from scRepertoirewe redefine here .grabMeta from
scRepertoire
#This is to grab the meta data from a seurat or SCE object
#' @importFrom SingleCellExperiment colData
#' @importFrom SeuratObject Idents
#' @importFrom methods slot
#' @keywords internal
.grabMeta <- function(sc) {
meta <- data.frame(sc[[]], slot(sc, "active.ident"))
colnames(meta)[length(meta)] <- "ident"
return(meta)
}This is the code used to create the combined expression
object from the BCR-Seq data. Due to personal confidentiality, the
object cannot be provided to GEO, but raw data are available on request
on EGA.
As default, the chunk related to BCR-Seq are not run.
if(params$accessibility == "unlock"){
combined <- readRDS(file.path("../",params$path_to_BCR))
srat <- RenameCells(srat,new.names = paste0(srat$patient, "_",gsub("(_1|_2|_3|_4|_5|_6|_7|_8|_9)*", "",colnames(srat))))
srat <- RenameCells(srat,new.names = gsub("_post", "",colnames(srat)))
srat$sample <- gsub("_post", "",srat$patient)
seurat <- combineExpression(combined, srat,
cloneCall = "strict",
group.by = "sample", chain = "both",
proportion = TRUE,
filterNA = TRUE)
s <- subset(seurat, subset = anno_l1 %in% c("B cells","Plasma cells") )
s@meta.data$cloneType <- factor(s@meta.data$cloneType, levels = unique(s$cloneType))
ss <- subset(s, subset = anno_l1 %in% c("Plasma cells"))
}else{
print("Please request access to the BCR-Seq data.")
}if(params$accessibility == "unlock"){
Idents(s) <- s$seurat_clusters
s@meta.data$seurat_clusters <- factor(s@meta.data$seurat_clusters, levels=c("34","7", "21", "38",
"15", "22","3", "4", "24"))
tmp <- data.frame(.grabMeta(s), identity = s$seurat_clusters, s@reductions[["umap"]]@cell.embeddings)
tmp$include <- ifelse(tmp$Frequency >= 0.1, "Yes", NA)
tmp2 <- subset(tmp, include == "Yes")
p1 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
geom_density_2d(color = "black", lwd=0.5, bins = 8) +
theme_minimal() +
labs(color = "B cell subsets") +
theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
scale_colour_manual(values = colors_B, breaks = unique(s$seurat_clusters)) +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n hyperexpanded") + ylim(-7,20)
print(p1)
DT::datatable(p1$data,
caption = ("Extended Data Figure 9Ca"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
tmp <- data.frame(.grabMeta(s), identity = s$seurat_clusters, s@reductions[["umap"]]@cell.embeddings)
tmp$include <- ifelse(tmp$Frequency >= 0.01 & tmp$Frequency <= 0.1 , "Yes", NA)
tmp2 <- subset(tmp, include == "Yes")
p2 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
geom_density_2d(color = "black", lwd=0.5, bins = 8) +
theme_minimal() +
labs(color = "B cell subsets") +
theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
scale_colour_manual(values = colors_B, breaks = levels(s$seurat_clusters)) +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n large clones") + ylim(-7,20)
print(p2)
DT::datatable(p2$data,
caption = ("Extended Data Figure 9Cb"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
tmp <- data.frame(.grabMeta(s), identity = s$seurat_clusters, s@reductions[["umap"]]@cell.embeddings)
tmp$include <- ifelse(tmp$Frequency >= 0.001 & tmp$Frequency <= 0.01 , "Yes", NA)
tmp2 <- subset(tmp, include == "Yes")
p3 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
geom_density_2d(color = "black", lwd=0.5, bins = 8) +
theme_minimal() +
labs(color = "B cell subsets") +
theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
scale_colour_manual(values = colors_B, breaks = levels(s$seurat_clusters)) +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n medium clones") + ylim(-7,20)
print(p3)
DT::datatable(p3$data,
caption = ("Extended Data Figure 9Cc"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
tmp <- data.frame(.grabMeta(s), identity = s$seurat_clusters, s@reductions[["umap"]]@cell.embeddings)
tmp$include <- ifelse(tmp$Frequency >= 0.0001 & tmp$Frequency <= 0.001 , "Yes", NA)
tmp2 <- subset(tmp, include == "Yes")
p4 <- ggplot(tmp2, mapping = aes(x=tmp2$UMAP_1, y = tmp2$UMAP_2)) +
geom_point(tmp, mapping = aes(x=tmp$UMAP_1, y = tmp$UMAP_2, color = tmp[,"identity"]), size= 1) +
geom_density_2d(color = "black", lwd=0.5, bins = 8) +
theme_minimal() +
labs(color = "B cell subsets") +
theme(text = element_text(size = 26), legend.key.width = unit(1, "cm"), legend.position = "right", legend.text = element_text(size=20)) + guides(colour = guide_legend(override.aes = list(size=8))) +
scale_colour_manual(values = colors_B, breaks = levels(s$seurat_clusters)) +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle(" Density of \n small clones") + ylim(-7,20)
print(p4)
DT::datatable(p4$data,
caption = ("Extended Data Figure 9Cd"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
}else{
print("Please request access to the BCR-Seq data.")
}# scRepertoire functions
#Use to shuffle between chains
off.the.chain <- function(dat, chain, cloneCall) {
chain1 <- toupper(chain) #to just make it easier
if (chain1 %in% c("TRA", "TRD", "IGH")) {
x <- 1
} else if (chain1 %in% c("TRB", "TRG", "IGL")) {
x <- 2
} else {
warning("It looks like ", chain, " does not match the available options for `chain = `")
}
dat[,cloneCall] <- str_split(dat[,cloneCall], "_", simplify = TRUE)[,x]
return(dat)
}
#Remove list elements that contain all NA values
checkBlanks <- function(df, cloneCall) {
for (i in seq_along(df)) {
if (length(df[[i]][,cloneCall]) == length(which(is.na(df[[i]][,cloneCall]))) |
length(which(!is.na(df[[i]][,cloneCall]))) == 0 |
nrow(df[[i]]) == 0) {
df[[i]] <- NULL
} else {
next()
}
}
return(df)
}
#Ensure df is in list format
checkList <- function(df) {
df <- if(is(df)[1] != "list") list(df) else df
return(df)
}
checkContigs <- function(df) {
df <- lapply(seq_len(length(df)), function(x) {
df[[x]] <- if(is(df[[x]])[1] != "data.frame") as.data.frame(df[[x]]) else df[[x]]
df[[x]][df[[x]] == ""] <- NA
df[[x]] <- df[[x]][with(df[[x]], order(reads, chain)),]
})
}
#' @importFrom dplyr bind_rows
bound.input.return <- function(df) {
if (inherits(x=df, what ="Seurat") | inherits(x=df, what ="SummarizedExperiment")) {
df <- grabMeta(df)
} else {
df <- bind_rows(df, .id = "element.names")
}
return(df)
}
list.input.return <- function(df, split.by) {
if (inherits(x=df, what ="Seurat") | inherits(x=df, what ="SummarizedExperiment")) {
if(is.null(split.by)){
split.by <- "cluster"
}
df <- expression2List(df, split.by)
}
return(df)
}
#Get UMAP or other coordinates
#' @importFrom SingleCellExperiment reducedDim
get.coord <- function(sc, reduction) {
if (is.null(reduction)) {
reduction = "pca"
}
if (inherits(x=sc, what ="Seurat")) {
coord <- sc@reductions[[reduction]]@cell.embeddings
} else if (inherits(x=sc, what ="SummarizedExperiment")) {
coord <- reducedDim(sc, reduction)
}
return(coord)
}
#This is to check the single-cell expression object
checkSingleObject <- function(sc) {
if (!inherits(x=sc, what ="Seurat") &&
!inherits(x=sc, what ="SummarizedExperiment")){
stop("Object indicated is not of class 'Seurat' or
'SummarizedExperiment', make sure you are using
the correct data.") }
}
#This is to grab the meta data from a seurat or SCE object
#' @importFrom SingleCellExperiment colData
grabMeta <- function(sc) {
if (inherits(x=sc, what ="Seurat")) {
meta <- data.frame(sc[[]], slot(sc, "active.ident"))
if ("cluster" %in% colnames(meta)) {
colnames(meta)[length(meta)] <- "cluster.active.ident"
} else {
colnames(meta)[length(meta)] <- "cluster"
}
}
else if (inherits(x=sc, what ="SummarizedExperiment")){
meta <- data.frame(colData(sc))
rownames(meta) <- sc@colData@rownames
clu <- which(colnames(meta) == "ident")
if ("cluster" %in% colnames(meta)) {
colnames(meta)[clu] <- "cluster.active.idents"
} else {
colnames(meta)[clu] <- "cluster"
}
}
return(meta)
}
#This is to add the sample and ID prefixes for combineBCR()/TCR()
modifyBarcodes <- function(df, samples, ID) {
out <- NULL
for (x in seq_along(df)) {
data <- df[[x]]
if (!is.null(ID)){
data$barcode <- paste0(samples[x], "_", ID[x], "_", data$barcode)
} else {
data$barcode <- paste0(samples[x], "_", data$barcode)
}
out[[x]] <- data }
return(out)
}
#Removing barcodes with NA recovered
removingNA <- function(final) {
for(i in seq_along(final)) {
final[[i]] <- na.omit(final[[i]])}
return(final)
}
#Removing barcodes with > 2 clones recovered
removingMulti <- function(final){
for(i in seq_along(final)) {
final[[i]] <- filter(final[[i]], !grepl(";",CTnt))}
return(final)
}
#Removing extra clonotypes in barcodes with > 2 productive contigs
#' @import dplyr
filteringMulti <- function(x) {
x <- x %>%
group_by(barcode, chain) %>%
slice_max(n = 1, order_by = reads)
table <- subset(as.data.frame(table(x$barcode)), Freq > 2)
if (nrow(table) > 0) {
barcodes <- as.character(unique(table$Var1))
multichain <- NULL
for (j in seq_along(barcodes)) {
chain <- x[x$barcode == barcodes[j],] %>%
group_by(barcode) %>% top_n(n = 2, wt = reads)
multichain <- rbind(multichain, chain)
}
x <- subset(x, barcode %!in% barcodes)
x <- rbind(x, multichain)
}
return(x)
}
#Filtering NA contigs out of single-cell expression object
#' @import dplyr
#' @importFrom SingleCellExperiment colData
filteringNA <- function(sc) {
meta <- grabMeta(sc)
evalNA <- data.frame(meta[,"cloneType"])
colnames(evalNA) <- "indicator"
evalNA <- evalNA %>%
transmute(indicator = ifelse(is.na(indicator), 0, 1))
rownames(evalNA) <- rownames(meta)
if (inherits(x=sc, what ="cell_data_set")){
colData(sc)[["evalNA"]]<-evalNA
return(sc[, !is.na(sc$cloneType)])
}else{
col.name <- names(evalNA) %||% colnames(evalNA)
sc[[col.name]] <- evalNA
sc <- subset(sc, cloneType != 0)
return(sc)
}
}
#Calculating diversity using Vegan R package
#' @importFrom vegan diversity estimateR
diversityCall <- function(data) {
w <- diversity(data[,"Freq"], index = "shannon")
x <- diversity(data[,"Freq"], index = "invsimpson")
y <- estimateR(data[,"Freq"])[2] #Chao
z <- estimateR(data[,"Freq"])[4] #ACE
z2 <- diversity(data[,"Freq"], index = "shannon")/log(length(data[,"Freq"]))
out <- c(w,x,y,z, z2)
return(out)
}
#Organizing list of contigs for visualization
parseContigs <- function(df, i, names, cloneCall) {
data <- df[[i]]
data1 <- data %>% group_by(data[,cloneCall]) %>%
summarise(Abundance=n())
colnames(data1)[1] <- cloneCall
data1$values <- names[i]
return(data1)
}
#Calculate the Morisita Index for Overlap Analysis
#' @author Massimo Andreatta, Nick Borcherding
morisitaIndex <- function(df, length, cloneCall, coef_matrix) {
for (i in seq_along(length)){
df.i <- df[[i]]
df.i <- data.frame(table(df.i[,cloneCall]))
colnames(df.i) <- c(cloneCall, 'Count')
df.i[,2] <- as.numeric(df.i[,2])
for (j in seq_along(length)){
if (i >= j){ next }
else { df.j <- df[[j]]
df.j <- data.frame(table(df.j[,cloneCall]))
colnames(df.j) <- c(cloneCall, 'Count')
df.j[,2] <- as.numeric(df.j[,2])
merged <- merge(df.i, df.j, by = cloneCall, all = TRUE)
merged[is.na(merged)] <- 0
X <- sum(merged[,2])
Y <- sum(merged[,3])
sum.df.i <- sum(df.i[,2]^2)
sum.df.j <- sum(df.j[,2]^2)
num <- 2 * sum(merged[, 2] * merged[, 3])
den <- ((sum.df.i / (X^2) + sum.df.j / (Y^2)) * X * Y)
coef.i.j <- num/den
coef_matrix[i,j] <- coef.i.j
}
}
}
return(coef_matrix)
}
#Calculate the Jaccard Similarity Index for Overlap Analysis
jaccardIndex <- function(df, length, cloneCall, coef_matrix) {
for (i in seq_along(length)){
df.i <- df[[i]]
df.i <- df.i[,c("barcode",cloneCall)]
df.i_unique <- df.i[!duplicated(df.i[,cloneCall]),]
for (j in seq_along(length)){
if (i >= j){ next }
else {
df.j <- df[[j]]
df.j <- df.j[,c("barcode",cloneCall)]
df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),]
overlap <- length(intersect(df.i_unique[,cloneCall],
df.j_unique[,cloneCall]))
coef_matrix[i,j] <-
overlap/(sum(length(df.i_unique[,cloneCall]),
length(df.j_unique[,cloneCall]))-overlap)
}
}
}
return(coef_matrix)
}
rawIndex <- function(df, length, cloneCall, coef_matrix) {
for (i in seq_along(length)){
df.i <- df[[i]]
df.i <- df.i[,c("barcode",cloneCall)]
df.i_unique <- df.i[!duplicated(df.i[,cloneCall]),]
for (j in seq_along(length)){
if (i >= j){ next }
else {
df.j <- df[[j]]
df.j <- df.j[,c("barcode",cloneCall)]
df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),]
overlap <- length(intersect(df.i_unique[,cloneCall],
df.j_unique[,cloneCall]))
coef_matrix[i,j] <- overlap
}
}
}
return(coef_matrix)
}
#Calculate the Overlap Coefficient for Overlap Analysis
#' @author Nick Bormann, Nick Borcherding
overlapIndex <- function(df, length, cloneCall, coef_matrix) {
for (i in seq_along(length)){
df.i <- df[[i]]
df.i <- df.i[,c("barcode",cloneCall)]
df.i_unique <- df.i[!duplicated(df.i[,cloneCall]),]
for (j in seq_along(length)){
if (i >= j){ next }
else { df.j <- df[[j]]
df.j <- df.j[,c("barcode",cloneCall)]
df.j_unique <- df.j[!duplicated(df.j[,cloneCall]),]
overlap <- length(intersect(df.i_unique[,cloneCall],
df.j_unique[,cloneCall]))
coef_matrix[i,j] <-
overlap/min(length(df.i_unique[,cloneCall]),
length(df.j_unique[,cloneCall])) } } }
return(coef_matrix)
}
# This suppressing outputs for using dput()
quiet <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
# This is to help sort the type of clonotype data to use
theCall <- function(x) {
if (x %in% c("CTnt", "CTgene", "CTaa", "CTstrict")) {
x <- x
}else if (x == "gene" | x == "genes") {
x <- "CTgene"
} else if(x == "nt" | x == "nucleotide") {
x <- "CTnt"
} else if (x == "aa" | x == "amino") {
x <- "CTaa"
} else if (x == "gene+nt" | x == "strict") {
x <- "CTstrict"
}
return(x)
}
# Assigning positions for TCR contig data
#' @author Gloria Kraus, Nick Bormann, Nick Borcherding
parseTCR <- function(Con.df, unique_df, data2) {
for (y in seq_along(unique_df)){
barcode.i <- Con.df$barcode[y]
location.i <- which(barcode.i == data2$barcode)
for (z in seq_along(location.i)) {
where.chain <- data2[location.i[z],"chain"]
if (where.chain == "TRA") {
if(is.na(Con.df[y,"TCR1"])) {
Con.df[y,tcr1_lines] <- data2[location.i[z],data1_lines]
} else {
Con.df[y,tcr1_lines] <- paste(Con.df[y, tcr1_lines],
data2[location.i[z],data1_lines],sep=";")
}
} else if (where.chain == "TRB") {
if(is.na(Con.df[y,"TCR2"])) {
Con.df[y,tcr2_lines] <- data2[location.i[z],data2_lines]
} else {
Con.df[y,tcr2_lines] <- paste(Con.df[y, tcr2_lines],
data2[location.i[z],data2_lines],sep=";")
}
}
}
}
return(Con.df)
}
#Assigning positions for BCR contig data
#Now assumes lambda over kappa in the context of only 2 light chains
#' @author Gloria Kraus, Nick Bormann, Nick Borcherding
parseBCR <- function (Con.df, unique_df, data2) {
for (y in seq_along(unique_df)) {
barcode.i <- Con.df$barcode[y]
location.i <- which(barcode.i == data2$barcode)
if (length(location.i) == 2) {
if (!is.na(data2[location.i[1], c("IGHct")])) {
Con.df[y, heavy_lines] <- data2[location.i[1], h_lines]
if(is.na(data2[location.i[2], c("IGHct")])) {
if (!is.na(data2[location.i[2], c("IGLct")])) {
Con.df[y, light_lines] <- data2[location.i[2], l_lines]
} else if(!is.na(data2[location.i[2], c("IGKct")])) {
Con.df[y, light_lines] <- data2[location.i[2], k_lines]
}
}
} else if (!is.na(data2[location.i[2], c("IGHct")])) {
Con.df[y, heavy_lines] <- data2[location.i[2], h_lines]
if(is.na(data2[location.i[1], c("IGHct")])) {
if (!is.na(data2[location.i[1], c("IGLct")])) {
Con.df[y, light_lines] <- data2[location.i[1], l_lines]
} else if(!is.na(data2[location.i[1], c("IGKct")])) {
Con.df[y, light_lines] <- data2[location.i[1], k_lines]
}
}
}
}else if (length(location.i) == 1) {
chain.i <- data2$chain[location.i]
if (chain.i == "IGH") {
Con.df[y, heavy_lines] <- data2[location.i[1], h_lines]
}
else if (chain.i == "IGL") {
Con.df[y, light_lines] <- data2[location.i[1], l_lines]
}
else {
Con.df[y, light_lines] <- data2[location.i[1], k_lines]
}
}
}
return(Con.df)
}
#Assign T/B cell chains and celltypes for combineTCR() and lengthContig
cellT <- function(cells) {
if (cells == "T-AB") {
chain1 <- "TRA"
chain2 <- "TRB"
cellType <- "T-AB"
} else if (cells == "T-GD") {
chain1 <- "TRD"
chain2 <- "TRG"
cellType <- "T-GD"
} else if (cells == "B") {
chain1 <- "IGH"
chain2 <- "IGL"
cellType <- "B"
}
return(list(chain1, chain2, cellType))
}
#Producing a data frame to visualize for lengthContig()
lengthDF <- function(df, cloneCall, chain, group, c1, c2){
Con.df <- NULL
names <- names(df)
if (chain == "both") {
for (i in seq_along(df)) {
length <- nchar(gsub("_", "", df[[i]][,cloneCall]))
val <- df[[i]][,cloneCall]
if (!is.null(group)) {
cols <- df[[i]][,group]
data <- na.omit(data.frame(length, val, cols, names[i]))
colnames(data) <- c("length", "CT", group, "values")
Con.df<- rbind.data.frame(Con.df, data)
} else {
data <- na.omit(data.frame(length, val, names[i]))
colnames(data) <- c("length", "CT", "values")
Con.df<- rbind.data.frame(Con.df, data) }}
} else if (chain != "both") {
for (x in seq_along(df)) {
df[[x]] <- off.the.chain(df[[x]], chain, cloneCall)
strings <- df[[x]][,cloneCall]
val1 <- strings
for (i in seq_along(val1)) {
if (grepl(";", val1[i]) == TRUE) {
val1[i] <- str_split(val1[i], ";", simplify = TRUE)[1]
} else { next() } }
chain1 <- nchar(val1)
if (!is.null(group)) {
cols1 <- df[[x]][,group]
data1 <- data.frame(chain1, val1, names[x], c1, cols1)
colnames(data1)<-c("length","CT","values","chain",group)
}else if (is.null(group)){
data1 <- data.frame(chain1, val1, names[x], c1)
colnames(data1) <- c("length", "CT", "values", "chain")
data <- na.omit(data1)
data <- subset(data, CT != "NA" & CT != "")
Con.df<- rbind.data.frame(Con.df, data) }}
}
return(Con.df)}
#General combination of nucleotide, aa, and gene sequences for T/B cells
assignCT <- function(cellType, Con.df) {
if (cellType %in% c("T-AB", "T-GD")) {
Con.df$CTgene <- paste(Con.df$TCR1, Con.df$TCR2, sep="_")
Con.df$CTnt <- paste(Con.df$cdr3_nt1, Con.df$cdr3_nt2, sep="_")
Con.df$CTaa <- paste(Con.df$cdr3_aa1, Con.df$cdr3_aa2, sep="_")
Con.df$CTstrict <- paste(Con.df$TCR1, Con.df$cdr3_nt1,
Con.df$TCR2, Con.df$cdr3_nt2, sep="_")
} else {
Con.df$CTgene <- paste(Con.df$IGH, Con.df$IGLC, sep="_")
Con.df$CTnt <- paste(Con.df$cdr3_nt1, Con.df$cdr3_nt2, sep="_")
Con.df$CTaa <- paste(Con.df$cdr3_aa1, Con.df$cdr3_aa2, sep="_") }
return(Con.df)
}
#Sorting the V/D/J/C gene sequences for T and B cells
#' @importFrom stringr str_c str_replace_na
#' @importFrom dplyr bind_rows
makeGenes <- function(cellType, data2, chain1, chain2) {
if(cellType %in% c("T-AB", "T-GD")) {
data2 <- data2 %>%
mutate(TCR1 = ifelse(chain == chain1,
str_c(str_replace_na(v_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."), NA)) %>%
mutate(TCR2 = ifelse(chain == chain2,
str_c(str_replace_na(v_gene), str_replace_na(d_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."), NA))
}
else {
heavy <- data2[data2$chain == "IGH",] %>%
mutate(IGHct = str_c(str_replace_na(v_gene), str_replace_na(d_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."))
kappa <- data2[data2$chain == "IGK",] %>%
mutate(IGKct = str_c(str_replace_na(v_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."))
lambda <- data2[data2$chain == "IGL",] %>%
mutate(IGLct = str_c(str_replace_na(v_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."))
data2 <- bind_rows(heavy, kappa, lambda)
}
return(data2)
}
short.check <- function(df, cloneCall) {
min <- c()
for (x in seq_along(df)) {
min.tmp <- length(which(!is.na(unique(df[[x]][,cloneCall]))))
min <- c(min.tmp, min)
}
min <- min(min)
return(min)
}
select.gene <- function(df, chain, gene, label) {
if (chain %in% c("TRB", "TRG", "IGH")) {
gene <- unname(c(V = 1, D = 2, J = 3, C = 4)[gene])
} else {
gene <- unname(c(V = 1, J = 2, C = 3)[gene])
}
if (ncol(str_split(df[,"CTgene"], "_", simplify = TRUE)) == 1) {
C1 <- str_split(df[,"CTgene"], "_", simplify = TRUE)[,1]
C1 <- str_split(C1, "[.]", simplify = TRUE)[,gene]
df$C1 <- C1
x <- "C1"
} else {
C1 <- str_split(df[,"CTgene"], "_", simplify = TRUE)[,1]
C1 <- str_split(C1, "[.]", simplify = TRUE)[,gene]
C2 <- str_split(df[,"CTgene"], "_", simplify = TRUE)[,2]
C2 <- str_split(C2, "[.]", simplify = TRUE)[,gene]
df$C1 <- C1
df$C2 <- C2
if (chain %in% c("TRA", "TRD", "IGH")) {
x <- "C1"}
else if (chain %in% c("TRB", "TRG", "IGL")) {
x <- "C2"}
}
return(df)
}
#' Examining the clonal homeostasis
#'
#' This function calculates the space occupied by clonotype proportions.
#' The grouping of these clonotypes is based on the parameter cloneTypes,
#' at default, cloneTypes will group the clonotypes into bins of Rare = 0
#' to 0.0001, Small = 0.0001 to 0.001, etc. To adjust the proportions,
#' change the number or labeling of the cloneTypes paramter. If a matrix
#' output for the data is preferred, set exportTable = TRUE.
#'
#' @examples
#' #Making combined contig data
#' x <- contig_list
#' combined <- combineTCR(x, rep(c("PX", "PY", "PZ"), each=2),
#' rep(c("P", "T"), 3), cells ="T-AB")
#' clonalHomeostasis(combined, cloneCall = "gene")
#'
#' @param df The product of combineTCR(), combineBCR(), expression2List(), or combineExpression().
#' @param cloneTypes The cutpoints of the proportions.
#' @param cloneCall How to call the clonotype - VDJC gene (gene),
#' CDR3 nucleotide (nt), CDR3 amino acid (aa), or
#' VDJC gene + CDR3 nucleotide (strict).
#' @param chain indicate if both or a specific chain should be used -
#' e.g. "both", "TRA", "TRG", "IGH", "IGL"
#' @param exportTable Exports a table of the data into the global
#' environment in addition to the visualization
#' @import ggplot2
#' @importFrom stringr str_split
#' @importFrom reshape2 melt
#' @export
#' @return ggplot of the space occupied by the specific proportion of clonotypes
clonalHomeostasis_m <- function(df, cloneTypes = c(Rare = .0001, Small = .001,
Medium = .01, Large = .1, Hyperexpanded = 1),
cloneCall = "strict", chain = "both",
exportTable = FALSE) {
cloneTypes <- c(cloneTypes)
df <- list.input.return(df)
cloneCall <- theCall(cloneCall)
df <- checkList(df)
df <- checkBlanks(df, cloneCall)
mat <- matrix(0, length(df), length(cloneTypes) - 1,
dimnames = list(names(df),
names(cloneTypes)[-1]))
if (chain != "both") {
for (x in seq_along(df)) {
df[[x]] <- off.the.chain(df[[x]], chain, cloneCall)
}
}
df <- lapply(df, '[[', cloneCall)
df <- lapply(df, na.omit)
fun <- function(x) { table(x)/length(x) }
df <- lapply(df, fun)
for (i in 2:length(cloneTypes)) {
mat[,i-1] <- vapply(df, function (x) sum(x[x > cloneTypes[i-1] & x <=
cloneTypes[i]]), FUN.VALUE = numeric(1))
colnames(mat)[i-1] <- paste0(names(cloneTypes[i]), ' (',
cloneTypes[i-1], ' < X <= ',
cloneTypes[i], ')') }
if (exportTable == TRUE) { return(mat) }
mat_melt <- melt(mat)
col <- length(unique(mat_melt$Var2))
order_mat <- mat_melt %>%
filter(Var2 == "Small (1e-04 < X <= 0.001)") %>%
arrange(value, desc = TRUE)
o <- order_mat$Var1
mat_melt <- mat_melt[mat_melt$Var1 %in% o,]
# mat_melt$Var1 <- factor(mat_melt$Var1, levels = o)
plot <- ggplot(mat_melt, aes(x=as.factor(Var1), y=value, fill=Var2)) +
geom_bar(stat = "identity", position="fill",
color = "black", lwd= 0.25) +
scale_fill_manual(name = "Clonotype Group",
values = c( "#8c6bb1","#41b6c4", "#fec44f","#ce1256" )) +
xlab("Samples") +
ylab("Relative Abundance") +
theme_void() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 20),axis.text.y = element_text(size = 20), legend.text = element_text(size = 20), axis.title = element_text(size=20), axis.title.y = element_text(angle = 90))
return(plot)
}if(params$accessibility == "unlock"){
p <- SCpubr::do_DimPlot(sample = s, reduction = "umap", label = FALSE, repel = TRUE, label.color = "white",
legend.position = "none", split.by = "cloneType", font.size = 12,pt.size = 1, ncol = 5, colors.use = colors_clonotypes)
print(p)
p <- SCpubr::do_DimPlot(sample = s, reduction = "umap", label = FALSE, repel = TRUE, label.color = "white",
legend.position = "none", group.by = "cloneType", font.size = 12,pt.size = 1, ncol = 5, colors.use = colors_clonotypes)
DT::datatable(p$data,
caption = ("Extended Data figure 10b"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
} else{
print("Please request access to BCR-Seq data.")
}if(params$accessibility == "unlock"){
combined_B <- lapply(combined, function(x) {x[x$barcode %in% colnames(s),] })
merge_combined_B <- bind_rows(combined_B, .id = "sample")
c_pCR <- combined_B[c("NeoBCC008",
"NeoBCC007",
"NeoBCC012",
"NeoBCC017" )]
p3 <- clonalHomeostasis_m(c_pCR, chain = "both") +
theme(axis.text.x=element_blank())
p3$labels$x <- "pCR"
c_nonpCR <- combined_B[c(
"NeoBCC010",
"NeoBCC014",
"NeoBCC011",
"NeoBCC004",
"NeoBCC005",
"NeoBCC018",
"NeoBCC006",
"NeoBCC015"
)]
p4 <- clonalHomeostasis_m(c_nonpCR, chain = "both") +
theme(axis.text.x=element_blank())
p4$labels$y <- " "
p4$labels$x <- "non-pCR"
p <- p3 + p4 + plot_layout(ncol = 2, widths = c(1,2), guides = "collect")
print(p)
DT::datatable(rbind(p3$data, p4$data),
caption = ("Extended Data figure 10b"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
} else{
print("Please request access to BCR-Seq data.")
}if(params$accessibility == "unlock"){
q1 <- quantContig(combined_B, cloneCall="gene+nt", exportTable = TRUE)
q1$Response <- "non-pCR"
q1$patient <- q1$values
q1$Response[grepl("NeoBCC007|NeoBCC008|NeoBCC012|NeoBCC017", q1$values)] <- "pCR"
q1$Response <- factor(q1$Response, levels = c("pCR", "non-pCR"))
q1$percent <- q1$contigs / q1$total * 100
g1 <- ggpaired(q1, x = "Response", y = "total", id="patient", color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Number of \n BCR clonality") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 4500, hide.ns = FALSE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("CR", "SD"), c("SD", "PR"), c("CR", "PR")), size = 8) +theme(text = element_text(size = 15), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylim(c(0, 5000)) & NoLegend()
DT::datatable(g1$data,
caption = ("Extended Data Figure 10c.a-c"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
g2 <- ggpaired(q1, x = "Response", y = "percent", id="patient", color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Relative % \n of unique clonotypes") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 100, hide.ns = FALSE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("CR", "SD"), c("SD", "PR"), c("CR", "PR")), size = 8) +theme(text = element_text(size = 15), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylim(c(0, 110)) & NoLegend()
g3 <- ggpaired(q1, x = "Response", y = "contigs", id="patient", color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = " Number of \n unique clonotypes") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 900, hide.ns = FALSE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("CR", "SD"), c("SD", "PR"), c("CR", "PR")), size = 8) +theme(text = element_text(size = 15), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylim(c(0, 1000)) & NoLegend()
norm_entropy <- function(observations) {
observations <- observations[!is.na(observations)]
p <- as.numeric(table(observations)) / length(observations)
n <- length(p)
if (n == 1) {
return(0)
}
h <- -sum(p*log(p))
h_max <- log(length(observations))
return(h / h_max)
}
clonality <- rep(NA, length(names(combined_B)))
names(clonality) <- names(combined_B)
for (p in names(combined_B)){
clonality[p] <- norm_entropy(combined_B[[p]]$CTaa)
}
clonality <- as.data.frame(clonality)
clonality$Response <- "non-pCR"
clonality$Response[grepl("NeoBCC007|NeoBCC008|NeoBCC012|NeoBCC017", rownames(clonality))] <- "pCR"
clonality$Response <- factor(clonality$Response, levels = c("pCR", "non-pCR"))
clonality$patient <- rownames(clonality)
g4 <- ggpaired(clonality, x = "Response", y = "clonality", id="patient", color = "black", fill = "Response" , palette= c("#66c2a4", "#ccece6"), point.size = 2, xlab = "Response", ylab = "Repertoire clonality") + stat_compare_means( method = "wilcox.test", paired = FALSE, label.y = 1, hide.ns = FALSE, label = "p.format", label.x.npc = "center", bracket.size = 1, comparison = list(c("CR", "SD"), c("SD", "PR"), c("CR", "PR")), size = 8) +theme(text = element_text(size = 15), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylim(c(0, 1.10)) & NoLegend()
DT::datatable(g4$data,
caption = ("Extended Data Figure 10c.d"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
g <- g1 + g3 +g2 + g4 + plot_layout(ncol=4, guides="collect")
print(g)
} else{
print("Please request access to BCR-Seq data.")
}if(params$accessibility == "unlock"){
s$cloneType <- factor(s$cloneType, levels = c("Hyperexpanded (0.1 < X <= 1)",
"Large (0.01 < X <= 0.1)",
"Medium (0.001 < X <= 0.01)",
"Small (1e-04 < X <= 0.001)" ))
genes <- list(
"Hyperexpanded" = c("HSBP1", "CSTB","CAP1", "SKP1"),
"Large" = c( "DPEP1","P2RX1"),
"Medium" = c("RAPGEF1", "FBXW7", "FAM30A"),
"Small" = c("SOCS3", "UQCRC2","ITM2B", "DDIT4")
)
p <- SCpubr::do_DotPlot(sample = s,
features = genes,
group.by = "cloneType",
font.size = 30,
legend.length = 20,
legend.type = "colorbar",
dot.scale = 10,
colors.use = c('#fee08b',"#ffffbf", "#f7f7f7","#c2a5cf", "#542788"))
print(p)
DT::datatable(p$data,
caption = ("EDF10d"),
extensions = 'Buttons',
options = list( dom = 'Bfrtip',
buttons = c( 'csv', 'excel')))
} else{
print("Please request access to BCR-Seq data.")
}## R version 4.3.0 (2023-04-21)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8
## [6] LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Europe/Vienna
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] stringr_1.5.1 msigdbr_7.5.1 DOSE_3.26.2 org.Hs.eg.db_3.17.0 AnnotationDbi_1.62.2 IRanges_2.34.1
## [7] S4Vectors_0.38.2 Biobase_2.60.0 BiocGenerics_0.46.0 clusterProfiler_4.8.3 enrichplot_1.20.3 scales_1.3.0
## [13] RColorBrewer_1.1-3 ggnewscale_0.4.10 tidyr_1.3.1 scRepertoire_1.10.1 dittoSeq_1.12.2 canceRbits_0.1.6
## [19] ggpubr_0.6.0.999 ggplot2_3.5.1 viridis_0.6.5 viridisLite_0.4.2 reshape2_1.4.4 tibble_3.2.1
## [25] SCpubr_2.0.2 DT_0.32 patchwork_1.2.0 dplyr_1.1.4 Seurat_5.0.3 SeuratObject_5.0.1
## [31] sp_2.1-3
##
## loaded via a namespace (and not attached):
## [1] fs_1.6.4 matrixStats_1.2.0 spatstat.sparse_3.0-3 bitops_1.0-7 HDO.db_0.99.1
## [6] httr_1.4.7 doParallel_1.0.17 tools_4.3.0 sctransform_0.4.1 backports_1.4.1
## [11] utf8_1.2.4 R6_2.5.1 vegan_2.6-4 lazyeval_0.2.2 uwot_0.1.16
## [16] mgcv_1.9-1 permute_0.9-7 withr_3.0.0 gridExtra_2.3 progressr_0.14.0
## [21] cli_3.6.2 spatstat.explore_3.2-7 fastDummies_1.7.3 scatterpie_0.2.1 isoband_0.2.7
## [26] labeling_0.4.3 sass_0.4.9 spatstat.data_3.0-4 ggridges_0.5.6 pbapply_1.7-2
## [31] yulab.utils_0.1.4 gson_0.1.0 stringdist_0.9.12 parallelly_1.37.1 limma_3.56.2
## [36] RSQLite_2.3.5 VGAM_1.1-10 rstudioapi_0.16.0 generics_0.1.3 gridGraphics_0.5-1
## [41] ica_1.0-3 spatstat.random_3.2-3 crosstalk_1.2.1 car_3.1-2 GO.db_3.17.0
## [46] Matrix_1.6-5 ggbeeswarm_0.7.2 fansi_1.0.6 abind_1.4-5 lifecycle_1.0.4
## [51] edgeR_3.42.4 yaml_2.3.8 carData_3.0-5 SummarizedExperiment_1.30.2 qvalue_2.32.0
## [56] Rtsne_0.17 blob_1.2.4 grid_4.3.0 promises_1.2.1 crayon_1.5.2
## [61] miniUI_0.1.1.1 lattice_0.22-6 cowplot_1.1.3 KEGGREST_1.40.1 pillar_1.9.0
## [66] knitr_1.45 fgsea_1.26.0 GenomicRanges_1.52.1 future.apply_1.11.1 codetools_0.2-19
## [71] fastmatch_1.1-4 leiden_0.4.3.1 glue_1.7.0 downloader_0.4 ggfun_0.1.5
## [76] data.table_1.15.2 treeio_1.24.3 vctrs_0.6.5 png_0.1-8 spam_2.10-0
## [81] gtable_0.3.5 assertthat_0.2.1 cachem_1.1.0 xfun_0.43 S4Arrays_1.0.6
## [86] mime_0.12 tidygraph_1.3.1 survival_3.5-8 DElegate_1.2.1 SingleCellExperiment_1.22.0
## [91] pheatmap_1.0.12 iterators_1.0.14 fitdistrplus_1.1-11 ROCR_1.0-11 nlme_3.1-164
## [96] ggtree_3.13.0.001 bit64_4.0.5 RcppAnnoy_0.0.22 evd_2.3-6.1 GenomeInfoDb_1.36.4
## [101] bslib_0.6.2 irlba_2.3.5.1 vipor_0.4.7 KernSmooth_2.23-22 DBI_1.2.2
## [106] colorspace_2.1-0 ggrastr_1.0.2 tidyselect_1.2.1 bit_4.0.5 compiler_4.3.0
## [111] SparseM_1.81 DelayedArray_0.26.7 plotly_4.10.4 shadowtext_0.1.3 lmtest_0.9-40
## [116] digest_0.6.35 goftest_1.2-3 spatstat.utils_3.0-4 rmarkdown_2.26 XVector_0.40.0
## [121] htmltools_0.5.8 pkgconfig_2.0.3 sparseMatrixStats_1.12.2 MatrixGenerics_1.12.3 highr_0.10
## [126] fastmap_1.2.0 rlang_1.1.4 htmlwidgets_1.6.4 shiny_1.8.1 farver_2.1.2
## [131] jquerylib_0.1.4 zoo_1.8-12 jsonlite_1.8.8 BiocParallel_1.34.2 GOSemSim_2.26.1
## [136] RCurl_1.98-1.14 magrittr_2.0.3 GenomeInfoDbData_1.2.10 ggplotify_0.1.2 dotCall64_1.1-1
## [141] munsell_0.5.1 Rcpp_1.0.12 evmix_2.12 babelgene_22.9 ape_5.8
## [146] reticulate_1.35.0 truncdist_1.0-2 stringi_1.8.4 ggalluvial_0.12.5 ggraph_2.2.1
## [151] zlibbioc_1.46.0 MASS_7.3-60.0.1 plyr_1.8.9 parallel_4.3.0 listenv_0.9.1
## [156] ggrepel_0.9.5 forcats_1.0.0 deldir_2.0-4 Biostrings_2.68.1 graphlayouts_1.1.1
## [161] splines_4.3.0 tensor_1.5 locfit_1.5-9.9 igraph_2.0.3 spatstat.geom_3.2-9
## [166] cubature_2.1.0 ggsignif_0.6.4 RcppHNSW_0.6.0 evaluate_0.23 foreach_1.5.2
## [171] tweenr_2.0.3 httpuv_1.6.15 RANN_2.6.1 purrr_1.0.2 polyclip_1.10-6
## [176] future_1.33.2 scattermore_1.2 ggforce_0.4.2 broom_1.0.5 xtable_1.8-4
## [181] tidytree_0.4.6 RSpectra_0.16-1 rstatix_0.7.2 later_1.3.2 gsl_2.1-8
## [186] aplot_0.2.3 beeswarm_0.4.0 memoise_2.0.1 cluster_2.1.6 powerTCR_1.20.0
## [191] globals_0.16.3